Importando bibliotecas que serão utilizadas

library(dplyr)
library(purrr)
library(stats)
library(plotly)
library(stringr)
library(janitor)
library(ggplot2)
source("./source/multiplot.R")

Carregando a base de Video Games, e limpando com o script fornecido pela fonte

url <- "https://raw.githubusercontent.com/lizawood/apps-and-games/master/PC_Games/PCgames_2004_2018_raw.csv"

raw_df <- url %>% 
  read.csv() %>% 
  janitor::clean_names() 

data <- raw_df %>% 
  mutate(price = as.numeric(price),
         score_rank = word(score_rank_userscore_metascore, 1),
         average_playtime = word(playtime_median, 1),
         median_playtime = word(playtime_median, 2),
         median_playtime = str_remove(median_playtime, "\\("),
         median_playtime = str_remove(median_playtime, "\\)"),
         average_playtime = 60 * as.numeric(str_sub(average_playtime, 1, 2)) +
           as.numeric(str_sub(average_playtime, 4, 5)),
         median_playtime = 60 * as.numeric(str_sub(median_playtime, 1, 2)) +
           as.numeric(str_sub(median_playtime, 4, 5)),
         metascore = as.double(str_sub(score_rank_userscore_metascore, start = -4, end = -3))) %>% 
  select(-score_rank_userscore_metascore, -score_rank, -playtime_median) %>% 
  rename(publisher = publisher_s, developer = developer_s)

Filtrando desenvolvedoras com pelo menos 20 jogos

count_dev <- data %>% 
  dplyr::group_by(developer) %>% 
  dplyr::summarise(cont = length(developer)) %>% 
  tidyr::drop_na() %>% 
  dplyr::filter(cont >= 20)

data <- data %>% 
  dplyr::filter(developer %in% count_dev$developer)

Plotando preço médio dos jogos dos 10 desenvolvedores com maiores quantidades de horas jogadas

developers_price <- data %>% 
  dplyr::group_by(developer) %>% 
  dplyr::summarise(price = round(mean(price), 2),
                   playtime = as.double(mean(average_playtime)/60)
                   ) %>% 
  dplyr::ungroup() %>% 
  dplyr::mutate(developer = as.factor(developer)) %>% 
  dplyr::arrange(desc(playtime)) %>% 
  dplyr::slice(1:10)

p <- developers_price %>% 
  ggplot(aes(x = developer, y = price, fill = developer)) + 
  geom_bar(colour="black", stat="identity") +
  labs(title = "Preço jogos - Top 10 desenvolvedores horas jogadas", y = "Preço U$", x = "Desenvolvedores")

ggplotly(p)

Evolução do preço (U$) dos jogos dos 10 desenvolvedores com maiores quantidades de horas jogadas ao longo dos anos

developers_price_evolution <- data %>% 
  dplyr::mutate(year = stringr::str_extract(release_date, regex("\\d{4}"))) %>% 
  dplyr::group_by(developer, year) %>% 
  dplyr::summarise(price = round(mean(price), 2),
                   playtime = as.double(mean(average_playtime)/60)
                   ) %>% 
  dplyr::ungroup() %>% 
  dplyr::mutate(developer = as.factor(developer),
                year = as.integer(year)) %>% 
  dplyr::filter(developer %in% developers_price$developer)